/* This replication code works on the following datasets:
1. Publically available data downloaded from https://fred.stlouisfed.org/series/ITNETUSERP2USA
2. Publically available data downloaded from https://www.census.gov/foreign-trade/balance/index.html
3. Publically available data downloaded from https://www.census.gov/manufacturing/m3/historical/timeseries.html
4. A pseudo dataset: lbd_full_pseudo.sas7bdat (SAS data)
5. A pseudo dataset: lrd_full_pseudo.sas7bdat (SAS data)
*/

libname ipo "X:\your_directory";

/* Figure 1: Trends in Chinese Import Penetration and Internet Access */
/* Table 1: Changes in Chinese Import Penetration and Internet Access from Pre- to Post-2000 */

*Internet users per 100 people;
*Download the publically available data from https://fred.stlouisfed.org/series/ITNETUSERP2USA;

proc import datafile="X:\your_directory\ITNETUSERP2USA.csv"
			out=ITNETUSERP2USA dbms=csv replace;
run; 

proc sql;
	create table internet as
	select year(date) as year,
	ITNETUSERP2USA as internet_user
	from ITNETUSERP2USA
	where 1990<=year(date)<=2014;
quit;

proc sgplot data=internet;
	series x=year y=internet_user / lineattrs=(color=blue);
	refline 2000 / axis=x LINEATTRS=(pattern=2);
run;

data internet;
	set internet;
	post2000=0;
	if year>2000 then post2000=1;
run;

proc ttest data=internet;
	var internet_user;
	class post2000;
run;

*China import penentration;
*Download the publically available data from
1. https://www.census.gov/foreign-trade/balance/index.html
2. https://www.census.gov/manufacturing/m3/historical/timeseries.html
;

proc import datafile="X:\your_directory\country.xlsx"
			out=country dbms=xlsx replace;
run; 

proc sql;
	create table country as
	select input(year,best32.) as year, ctyname,
	IYR as import,
	EYR as export
	from country;
quit;

proc import datafile="X:\your_directory\naicsvsp.xlsx"
			out=naicsvsp dbms=xlsx replace; getnames=no;
run;

proc sql;
	create table naicsvsp as
	select B as year,
	C+D+E+input(F,best12.)+input(G,best12.)+input(H,best12.)+input(I,best12.)+input(J,best12.)+input(K,best12.)+input(L,best12.)+input(M,best12.)+input(N,best12.) as tvs
	from naicsvsp
	where A="UMTMVS";
quit;

proc sql;
	create table expenditure as
	select a.year, b.tvs+a.import-a.export as expenditure
	from country as a
	left join naicsvsp as b
	on a.year=b.year
	where a.ctyname="World, Not Seasonally Adjusted";
quit;

proc sql;
	create table china as
	select a.year, (a.import-a.export)/b.expenditure as china_import
	from country as a
	left join expenditure as b
	on a.year=b.year
	where a.ctyname="China" and 1990<=a.year<=2014;
quit;

proc sgplot data=china;
	series x=year y=china_import / lineattrs=(color=blue);
	refline 2000 / axis=x LINEATTRS=(pattern=2);
run;

data china;
	set china;
	post2000=0;
	if year>2000 then post2000=1;
run;

proc ttest data=china;
	var china_import;
	class post2000;
run;

/* Read the pseudo LBD data */

data lbd;
  set ipo.lbd_full_pseudo;
  if ipo=. then ipo=0;
  if public=. then public=0;
  if private=. then private=0;
  if ma=. then ma=0;
  naics2=int(naics/10000);
  if year>2000 then late=1; else late=0;
  if st not in ("AK","AL","AR","AZ","CA","CO","CT",
  "DC","DE","FL","GA","HI","IA","ID","IL","IN","KS",
  "KY","LA","MA","MD","ME","MI","MN","MO","MS","MT",
  "NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK",
  "OR","PA","RI","SC","SD","TN","TX","UT","VA","VT",
  "WA","WI","WV","WY") then st ="NA";
  if emp<20 then size=1;
  else if emp<100 then size=2;
  else if emp<250 then size=3;
  else if emp<500 then size=4;
  else if emp<1000 then size=5;
  else if emp<2500 then size=6;
  else if emp<5000 then size=7;
  else if emp<10000 then size=8;
  else size=9;
  if emp>=20 then emp20=1; else emp20=0;
  if emp>=200 then emp200=1; else emp200=0;
  where emp>0 and naics ne .;
run;

/* Figure 2: Fraction of IPO, acquired, public, and private firms in LBD sample */
/* Figure 6: number of fraction of firms with at least 200 employees in LBD sample */
/* Figure 8: avg number of employees of IPO/acquired/public/private firms in LBD sample */

proc sql;
  create table figure2_6_8 as
  select distinct year, count(firmid) as nfirm, 
  sum(ipo) as nipo, sum(ma) as nacq, sum(public) as npub, sum(private) as nprv,
  sum(case when ipo=1 then emp end)/sum(ipo) as empipo,
  sum(case when ma=1 then emp end)/sum(ma) as empacq,
  sum(case when public=1 then emp end)/sum(public) as emppub,
  sum(case when private=1 then emp end)/sum(private) as empprv,
  count(case when emp>=20 then firmid end) as n20emp, count(case when emp>=200 then firmid end) as n200emp
  from lbd
  group by year;
quit;

data figure2_6_8;
  set figure2_6_8;
  pctipo=nipo/nfirm;
  pctacq=nacq/nfirm;
  pctpub=npub/nfirm;
  pctprv=nprv/nfirm;
  pct20emp=n20emp/nfirm;
  pct200emp=n200emp/nfirm;
  drop nipo nacq npub nprv n20emp n200emp;
run;

/* Figure 3 raw and percentage change in IPO prob from pre to post 2000 across naics and states: LBD */

proc sql;
  create table figure3ab as
  select distinct naics2, year, late, sum(ipo)/count(firmid) as pctipo
  from lbd
  group by naics2, year;
quit;

proc sql;
  create table figure3ab as
  select distinct naics2, late, mean(pctipo) as pctipo
  from figure3ab
  group by naics2, late;
quit;

proc sql;
	create table figure3ab as
	select a.naics2,
	b.pctipo-a.pctipo as chg_pct_ipo,
	(b.pctipo-a.pctipo)/a.pctipo as pctchg_pct_ipo
	from figure3ab as a
	left join figure3ab as b
	on a.naics2=b.naics2
	where a.late=0 and b.late=1;
quit;

proc sql;
  create table figure3cd as
  select distinct st, year, late, sum(ipo)/count(firmid) as pctipo
  from lbd
  group by st, year;
quit;

proc sql;
  create table figure3cd as
  select distinct st, late, mean(pctipo) as pctipo
  from figure3cd
  group by st, late;
quit;

proc sql;
	create table figure3cd as
	select a.st,
	b.pctipo-a.pctipo as chg_pct_ipo,
	(b.pctipo-a.pctipo)/a.pctipo as pctchg_pct_ipo
	from figure3cd as a
	left join figure3cd as b
	on a.st=b.st
	where a.late=0 and b.late=1;
quit;

/* Figure 4 raw and percentage change in IPO prob from pre to post 2000 across size groups: LBD */

proc sql;
  create table figure4 as
  select distinct size, year, late, sum(ipo)/count(firmid) as pctipo
  from lbd
  group by size, year;
quit;

proc sql;
  create table figure4 as
  select distinct size, late, mean(pctipo) as pctipo
  from figure4
  group by size, late;
quit;

proc sql;
	create table figure4 as
	select a.size,
	b.pctipo-a.pctipo as chg_pct_ipo,
	(b.pctipo-a.pctipo)/a.pctipo as pctchg_pct_ipo
	from figure4 as a
	left join figure4 as b
	on a.size=b.size
	where a.late=0 and b.late=1;
quit;

/* Read the pseudo ASM/CMF data */

data lrd;
  set ipo.Lrd_full_pseudo;
  keep firmid year vc ipo public private ma tfp sales sales_growth censusyear naics3 nplant late exit;
run;

data lrd;
  set lrd;
  if tfp>0.05 then hightfp=1; else hightfp=0;
  if sales>10000 then highsales=1; else highsales=0;
  if sales_growth>0.15 then highsalesgrowth=1; else highsalesgrowth=0;
run;

/* Figure 7: average TFP/sales/sales_growth and fraction of high TFP/sales/sales_growth over years: LRD */

proc sql;
  create table figure7 as
  select distinct year,
  count(firmid) as nfirm,
  mean(tfp) as tfp,
  mean(sales) as sales,
  mean(sales_growth) as sales_growth,
  count(case when hightfp=1 then firmid end)/count(firmid) as pcthightfp,
  count(case when highsales=1 then firmid end)/count(firmid) as pcthighsales,
  count(case when highsalesgrowth=1 then firmid end)/count(firmid) as pcthighsalesgrowth
  from lrd
  group by year;
quit;

/* Figure 9: changes in TFP/sales/sales_growth from pre to post 2000 by exit choice: LRD */

proc sql;
  create table figure9 as
  select distinct exit, year, late,
  mean(tfp) as tfp,
  mean(sales) as sales,
  mean(sales_growth) as sales_growth
  from lrd
  where public=0
  group by exit, year;
quit;

proc sql;
  create table figure9 as
  select distinct exit, late,
  mean(tfp) as tfp,
  mean(sales) as sales,
  mean(sales_growth) as sales_growth
  from figure9
  group by exit, late;
quit;

proc sql;
	create table figure9 as
	select a.exit,
	b.tfp-a.tfp as chg_tfp,
	b.sales-a.sales as chg_sales,
	b.sales_growth-a.sales_growth as chg_sales_growth
	from figure9 as a
	left join figure9 as b
	on a.exit=b.exit
	where a.late=0 and b.late=1;
quit;

/* Figure 5: Fraction of small firms among high-TFP/sales-growth firms in pre and post 2000 periods: LRD */

proc sql;
  create table lrd as
  select a.*, b.emp, b.emp20, b.emp200
  from lrd as a
  left join lbd as b
  on a.firmid=b.firmid and a.year=b.year;
quit;

data lrd;
  set lrd;
    if private=1 and emp20=0 then small20=1; else small20=0;
    if private=1 and emp20=1 then large20=1; else large20=0;
    if small20=1 and hightfp=1 then small20hightfp=1; else small20hightfp=0;
    if small20=1 and highsalesgrowth=1 then small20highsalesgrowth=1; else small20highsalesgrowth=0;
    if large20=1 and hightfp=1 then large20hightfp=1; else large20hightfp=0;
    if large20=1 and highsalesgrowth=1 then large20highsalesgrowth=1; else large20highsalesgrowth=0;
run;

*T-tests for pre/post-2000;

%macro temp;

%let var_list = hightfp highsalesgrowth;

%do i=1 %to %sysfunc(countw(&var_list));
%let var=%scan(&var_list, &i);

data temp&i.;
  set lrd;
  if year>2000 then post=1; else post=0;
  where &var.=1;
run;

proc sort data=temp&i.;
  by post;
run;

ods output statistics=statistics&i. ttests=ttests&i.;

proc ttest data=temp&i.;
  var small20&var. large20&var. ;
  class post;
run;

ods output close;

proc sql;
  create table temp&i. as
  select a.variable, a.mean as pre, b.mean as post, c.tValue, c.Probt
  from statistics&i. as a
  left join statistics&i. as b
  on a.variable=b.variable and b.class="1"
  left join ttests&i. as c
  on a.variable=c.variable and c.method="Pooled"
  where a.class="0";
quit;

%end;
%mend;
%temp;

data figure5;
  set temp2 temp1;
run;

*************************************;

proc export outfile="X:\your_directory\figure2_6_8.xls"
  data=figure2_6_8 dbms=xls replace;
run;

proc export outfile="X:\your_directory\figure3ab.xls"
  data=figure3ab dbms=xls replace;
run;

proc export outfile="X:\your_directory\figure3cd.xls"
  data=figure3cd dbms=xls replace;
run;

proc export outfile="X:\your_directory\figure4.xls"
  data=figure4 dbms=xls replace;
run;

proc export outfile="X:\your_directory\figure7.xls"
  data=figure7 dbms=xls replace;
run;

proc export outfile="X:\your_directory\figure9.xls"
  data=figure9 dbms=xls replace;
run;

proc export outfile="X:\your_directory\figure5.xls"
  data=figure5 dbms=xls replace;
run;
